perm filename FFT.TIM[TIM,LSP]10 blob sn#747499 filedate 1984-03-19 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00012 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00002 00002	 SAIL FFT done 10 times
C00006 00003	 Barrow's FFT times 
C00008 00004	 InterLisp-10
C00016 00005	∂12-May-82  0003	Mabry Tyson <Tyson at SRI-AI> 	UCI-Lisp timing on Barrow FFT
C00018 00006	Various machines via Barrow
C00020 00007	LM-2
C00021 00008	 FFT
C00022 00009	 FFT
C00023 00010	 NIL
C00026 00011	 InterLisp Vax 780
C00027 00012	 PSL-20
C00030 ENDMK
C⊗;
;;; SAIL FFT done 10 times
.r plisp

T 
(fasload fft)
42650. 
(timit)

Cpu Time = 4.392
Elapsed Time = 8.15
Wholine Time = 8.1333333
GC time = 1.463
Load Average Before = 0.32191658
Load Average After  = 0.361239552
NIL 
(timit)

Cpu Time = 4.391
Elapsed Time = 8.1333333
Wholine Time = 8.1333333
GC time = 1.458
Load Average Before = 0.356279135
Load Average After  = 0.39353633
NIL 

;;; Model B from Here on down
↑C
↑C

;;; FFT (Model B)
Cpu Time = 4.004
Elapsed Time = 23.9
Wholine Time = 9.8666667
GC time = 1.833
Load Average Before = 1.12649739
Load Average After  = 1.4469099

(fasload fft)
(fasload ttime fas dsk (mac lsp))
(timit)

Cpu Time = 4.0
Elapsed Time = 10.4166666
Wholine Time = 9.3833333
GC time = 2.914
Load Average Before = 0.296131015
Load Average After  = 0.35368061
NIL 

Cpu Time = 4.001
Elapsed Time = 10.3833333
Wholine Time = 9.4666667
GC time = 2.902
Load Average Before = 0.337160707
Load Average After  = 0.392315626
NIL 

Cpu Time = 4.0
Elapsed Time = 11.4833333
Wholine Time = 9.65
GC time = 2.906
Load Average Before = 0.380419016
Load Average After  = 0.451439977
NIL 


;;; SAIL (Model B) No Cache

(fasload fft)
(timit)

Cpu Time = 4.032
Elapsed Time = 1655.4
Wholine Time = 114.066667
GC time = 7.601
Load Average Before = 13.4277709
Load Average After  = 7.070505
NIL 


;;; SAIL (Model B) 1/2 cache (?)
(fasload fft)
(timit)

Cpu Time = 4.001
Elapsed Time = 17.95
Wholine Time = 16.3666666
GC time = 7.621
Load Average Before = 0.214488983
Load Average After  = 0.327040195
NIL 

Cpu Time = 3.996
Elapsed Time = 25.1833334
Wholine Time = 16.7166667
GC time = 7.63
Load Average Before = 0.286596775
Load Average After  = 0.51067257
NIL 

(timit)

Cpu Time = 4.004
Elapsed Time = 87.983334
Wholine Time = 22.2166667
GC time = 7.637
Load Average Before = 3.0016383
Load Average After  = 3.9317503
NIL 
 (alloc '(flonum (10000. 10000. 0.15))) 
(gc)
(timit)

Cpu Time = 4.006
Elapsed Time = 62.15
Wholine Time = 12.3
GC time = 2.375
Load Average Before = 4.1889869
Load Average After  = 4.731683
NIL 
 (alloc '(flonum (100000. 100000. 0.15))) 
(gc)
(timit)

Cpu Time = 4.005
Elapsed Time = 49.75
Wholine Time = 11.8166667
GC time = 2.376
Load Average Before = 4.5790273
Load Average After  = 4.72924495
NIL 
;;; Barrow's FFT times 
---------------------------------------------------------------
| Machine & |	 Interpreted  |   Compiled	| Interpreted |
| Language  |	Secs	Ratio |	Secs	Ratio	| /Compiled   |
---------------------------------------------------------------
|	    |		      |			|	      |
| 2060	    |	 28.7	1.0   |	 0.532	  1.0	|  53.9	      |
| Maclisp   |		      |			|	      |
|	    |		      |			|	      |
| LispM	    |	 97.2	3.39  |	 3.52	  6.62	|  27.6       |
| Zetalisp  |		      |			|	      |
|	    |		      |			|	      |
| Vax	    |	135.5	4.72  |	66.5	125.0	|   2.04      |
| Franzlisp |		      |			|	      |
|	    |		      |			|	      |
| 2060	    |	 37.3	1.30  | 12.6	 23.68	|   2.96      |
| Interlisp |		      |			|	      |
|	    |		      |			|	      |
| Dolphin   |	431.7	15.0  |	149.1	280.3	|   1.54      |
| Interlisp |		      |			|	      |
|	    |		      |			|	      |
---------------------------------------------------------------
-------
;;; InterLisp-10
∂07-May-82  1956	MASINTER at PARC-MAXC 	Interlisp-10 FFT timings   
Date:  7 MAY 1982 1956-PDT
From: MASINTER at PARC-MAXC
Subject: Interlisp-10 FFT timings
To:   RPG at SU-AI
cc:   masinter

Here are the Interlisp-10 times I got using a slight modification
of Barrow's original translation:

 Speed:     1.715 CPU seconds        Space:         1 large integers
           13.033 real seconds                  13318 floating numbers
            9.975 gc time                          39 page faults
load av=     .679 

With MINFS(20000 FLOATP)

 Speed:     1.691 CPU seconds        Space:         1 large integers
            4.190 real seconds                  13318 floating numbers
            1.917 gc time                          28 page faults
load av=     .724 

Here is the code. First, a "fast floating" package:

(FILECREATED " 7-May-82 19:50:45" <MASINTER>FELT..3 660    

     previous date: "30-Mar-82 00:30:40" <MASINTER>FELT..2)


(PRETTYCOMPRINT FELTCOMS)

(RPAQQ FELTCOMS [(MACROS * FELTMACROS)
		 (P (MOVD (QUOTE ELT)
			  (QUOTE FLELT))
		    (MOVD (QUOTE SETA)
			  (QUOTE FLSETA])

(RPAQQ FELTMACROS (FLELT FLSETA))
(DECLARE: EVAL@COMPILE 

(PUTPROPS FLELT MACRO [(A N)
	   (.FLOC. (VAG (OPENR (VAG (IPLUS (LOC A)
					   (ADD1 N])

(PUTPROPS FLSETA MACRO ((A N V)
			(CLOSER (IPLUS (LOC A)
				       (ADD1 N))
				(FLOAT V))))
)
(MOVD (QUOTE ELT)
      (QUOTE FLELT))
(MOVD (QUOTE SETA)
      (QUOTE FLSETA))
(DECLARE: DONTCOPY
  (FILEMAP (NIL)))
STOP


And then FFTI

(FILECREATED " 7-May-82 19:50:31" <MASINTER>FFTI.LSP.5 3390   

     previous date: " 7-May-82 19:45:09" <MASINTER>FFTI.LSP.4)


(PRETTYCOMPRINT FFTICOMS)

(RPAQQ FFTICOMS ((FNS * FFTIFNS)
		 (LOCALVARS . T)))

(RPAQQ FFTIFNS (FFT TRY))
(DEFINEQ

(FFT
  [LAMBDA (AREAL AIMAG)                         (* edited: 
						"30-Mar-82 00:25")
                                                (* Fast Fourier 
						Transform AREAL = real 
						part, AIMAG = imaginary 
						part)
    (PROG (AR AI PI I J K M N LE LE1 IP NV2 NM1 UR UI WR WI TR TI)
          (SETQ AR AREAL)                       (* Initialize)
          (SETQ AI AIMAG)
          (SETQ PI 3.141593)
          (SETQ N (ARRAYSIZE AR))
          (SETQ NV2 (IQUOTIENT N 2))
          (SETQ NM1 (SUB1 N))
          (SETQ M 0)                            (* Compute M = log 
						(N))
          (SETQ I 1)
      L1  (COND
	    ((ILESSP I N)
	      (SETQ M (ADD1 M))
	      (SETQ I (IPLUS I I))
	      (GO L1)))
          [COND
	    ((NOT (IEQP N (EXPT 2 M)))
	      (PRIN1 "Error ... array size not a power of two.")
	      (HELP)
	      (RETURN (TERPRI]
          (SETQ J 1)                            (* Interchange elements)
          (SETQ I 1)                            (* in bit-reversed 
						order)
      L3  (COND
	    ((ILESSP I J)
	      (SETQ TR (FLELT AR J))
	      (SETQ TI (FLELT AI J))
	      (FLSETA AR J (FLELT AR I))
	      (FLSETA AI J (FLELT AI I))
	      (FLSETA AR I TR)
	      (FLSETA AI I TI)))
          (SETQ K NV2)
      L6  (COND
	    ((ILESSP K J)
	      (SETQ J (IDIFFERENCE J K))
	      (SETQ K (IQUOTIENT K 2))
	      (GO L6)))
          (SETQ J (IPLUS J K))
          (SETQ I (ADD1 I))
          (COND
	    ((ILESSP I N)
	      (GO L3)))
          (for L from 1 to M
	     do                                 (* Loop thru stages)
		(SETQ LE (EXPT 2 L))
		(SETQ LE1 (IQUOTIENT LE 2))
		(SETQ UR 1.0)
		(SETQ UI 0.0)
		[SETQ WR (COS (FQUOTIENT PI (FLOAT LE1]
		[SETQ WI (SIN (FQUOTIENT PI (FLOAT LE1]
		(for J from 1 to LE1
		   do                           (* Loop thru 
						butterflies)
		      (for I←J by (IPLUS I LE) while (ILEQ I N)
			 do                     (* Do a butterfly)
			    (SETQ IP (IPLUS I LE1))
			    (SETQ TR (FDIFFERENCE (FTIMES (FLELT AR IP)
							  UR)
						  (FTIMES (FLELT AI IP)
							  UI)))
			    (SETQ TI (FPLUS (FTIMES (FLELT AR IP)
						    UI)
					    (FTIMES (FLELT AI IP)
						    UR)))
			    (FLSETA AR IP (FDIFFERENCE (FLELT AR I)
						       TR))
			    (FLSETA AI IP (FDIFFERENCE (FLELT AI I)
						       TI))
			    (FLSETA AR I (FPLUS (FLELT AR I)
						TR))
			    (FLSETA AI I (FPLUS (FLELT AI I)
						TI)))
		      (SETQ TR (FDIFFERENCE (FTIMES UR WR)
					    (FTIMES UI WI)))
		      (SETQ TI (FPLUS (FTIMES UR WI)
				      (FTIMES UI WR)))
		      (SETQ UR TR)
		      (SETQ UI TI)))
          (RETURN T])

(TRY
  [LAMBDA (SIZE)                                (* edited: 
						"30-Mar-82 00:26")
    (COND
      ((NULL SIZE)
	(SETQ SIZE 1024)))
    (SETQ RE (ARRAY SIZE (QUOTE FLOATP)))
    (SETQ IM (ARRAY SIZE (QUOTE FLOATP)))
    (for I from 1 to SIZE do (FLSETA RE I (FLOAT 0))
			     (FLSETA IM I (FLOAT 0)))
    (TIME (FFT RE IM)
	  1])
)
(DECLARE: DOEVAL@COMPILE DONTCOPY

(LOCALVARS . T)
)
(DECLARE: DONTCOPY
  (FILEMAP (NIL (248 3309 (FFT 260 . 2954) (TRY 2958 . 3306)))))
STOP

∂12-May-82  0003	Mabry Tyson <Tyson at SRI-AI> 	UCI-Lisp timing on Barrow FFT
Date: 11 May 1982 2354-PDT
From: Mabry Tyson <Tyson at SRI-AI>
Subject: UCI-Lisp timing on Barrow FFT
To: rpg at SU-AI

Here are the results for the Barrow FFT benchmark for UCI-Lisp (version
from UTexas, run on SRI-AI 2060).

Notes for this benchmark: UCI-Lisp does not have an ARRAYCALL function.  I
replaced the ARRAYCALL's by calls directly to the arrays.  Also, UCI-Lisp does
not have the size of an array on the property list of the array name so I
added a parameter to FFT that is the size of the array.  The arithmetic
functions COS, etc are not normally included in UCI-Lisp and are loaded from a
Fortran library by means of the loader.  This existed in the original UCI-Lisp
(1973) and RUCI-Lisp but I don't know offhand if it is distributed with
Meehan's version.

			Time w/o GC	GC	Total time
Interpreted		  16.130	1.829	17.959
Compiled, slow links	   6.739	1.815	 8.554
Compiled, fast links	   3.334	1.779	 5.113
-------

Various machines via Barrow
---------------------------------------------------------------
| Machine & |	 Interpreted  |   Compiled	| Interpreted |
| Language  |	Secs	Ratio |	Secs	Ratio	| /Compiled   |
---------------------------------------------------------------
|	    |		      |			|	      |
| 2060	    |	 28.7	1.0   |	 0.532	  1.0	|  53.9	      |
| Maclisp   |		      |			|	      |
|	    |		      |			|	      |
| LispM	    |	 97.2	3.39  |	 3.52	  6.62	|  27.6       |
| Zetalisp  |		      |			|	      |
|	    |		      |			|	      |
| Vax	    |	135.5	4.72  |	66.5	125.0	|   2.04      |
| Franzlisp |		      |			|	      |
|	    |		      |			|	      |
| 2060	    |	 37.3	1.30  | 12.6	 23.68	|   2.96      |
| Interlisp |		      |			|	      |
|	    |		      |			|	      |
| Dolphin   |	431.7	15.0  |	149.1	280.3	|   1.54      |
| Interlisp |		      |			|	      |
|	    |		      |			|	      |
---------------------------------------------------------------
-------

;;;LM-2
;; 10. FFT

;;; Sets up the two arrays
;---
(SETQ RE (FSYMEVAL (ARRAY RE FLONUM 1025.)))

(SETQ IM (FSYMEVAL (ARRAY IM FLONUM 1025.)))

(DEFUN TEST-FFT ()
  (TIMING "FFT" (LOOP REPEAT 10. DO (FFT 'RE 'IM))))

;; Compiled:  36.8 seconds.
;; Array references compile OK in this test.

;;; FFT
D3
Normal CL arrays
Clean
CPU	28.2
GC	3.08

Loaded
CPU	32.3
GC	3.23

Fast CL arrays
Clean
CPU	28.0
GC	3.08

Loaded
CPU	32.1
GC	3.24

Interlisp Arrays
Clean
CPU	30.4
GC	2.99

Loaded
CPU	34.3
GC	2.32

D3
1/25/84 with interrupts
CPU	1.57
GC	3.03
;;; FFT
D2
Normal CL arrays
Clean
CPU	15.6
GC	21.6

Loaded
CPU	17.6
GC	22.6

Fast CL arrays
Clean
CPU	13.2
GC	21.7

Loaded
CPU	15.0
GC	22.8

Interlisp Arrays
Clean
CPU	31.3
GC	21.2

Loaded
CPU	32.5
GC	22.0

D1
1/25/84 with interrupts CMLArrays
Elapsed	55.0
Swap	  .61
CPU	44.2
GC	10.1
;;; NIL
FFT

Removed binding/init of PI.  PI is a DEFCONSTANT constant in NIL.

I hate this one.  I've played with it before.  I will give two
different ways of doing it.

Version A:  using an array of :element-type double-float.  This is a
joke in the current nil, because every access will have to cons
because there is no way to declare such an access.  The access is done
with AREF (= old nil VREF when one dimensional).  I'm including this
for joke value, sort of.  Probably most of the time is spent flonum-
consing and paging in that memory.
cpu=221.21,elapsed=220.86,pagefaults=17134

Version B:  use a simple [general] vector which happens to contain
flonums, and access it with SGVREF (remember that?).
cpu-35.59,elapsed=38.17,pagefaults=5674

Version C (if i get the energy) will use just an untyped 1-dimensional
array and AREF.  The vector will of course be a simple general vector,
but it will be using generic vector referencing.

In all three cases, the following happens.  Nested open-compiled
flonum functions do not cons intermediate results.  However, computed
flonums put anywhere, including passing them as arguments to all but a
select few functions (of which i think COS and SIN are examples), will
cons them.  Saving as local variables conses also.
Note also we are using double-floats here (all we support
at the moment):  64 bit vax d←floats.

There is one additional bum i am NOT using, because i do not have
"nice" semantics to use it.  That is, FLOAT of a fixnum can be
open-compiled (we have an IFLOAT primitive, but it's supposedly not
"public").  This would turn into a single vax instruction, and also
eliminate a couple flonum conses as described above.
;;; InterLisp Vax 780

FFT:
←(TIME (FFT RE IM))
4 conses
27.488 seconds
T

FFFT:
←(TIME (FFFT RE IM))
3 conses
26.192 seconds
T

SFFT:
←(TIME (SFFT ARE AIM]
4 conses
31.744 seconds
T
;;; PSL-20

 4:04:54 USER	FFT Test
 4:04:54 USER	"FFT Test"
 4:04:54 USER	
 4:04:54 USER	Timing performed on DEC-20
 4:04:54 USER	11-Mar-84 04:04:54 .
 4:04:54 USER	*** Garbage collection starting
 4:04:55 USER	*** GC 3: time 1006 ms, 58750 recovered, 251120 free
 4:05:01 USER	*** Garbage collection starting
 4:05:03 USER	*** GC 4: time 1789 ms, 244956 recovered, 244958 free
 4:05:09 USER	*** Garbage collection starting
 4:05:11 USER	*** GC 5: time 1582 ms, 244950 recovered, 244952 free
 4:05:17 USER	*** Garbage collection starting
 4:05:18 USER	*** GC 6: time 1433 ms, 244950 recovered, 244952 free
 4:05:24 USER	*** Garbage collection starting
 4:05:26 USER	*** GC 7: time 1656 ms, 244950 recovered, 244952 free
 4:05:31 USER	*** Garbage collection starting
 4:05:33 USER	*** GC 8: time 1708 ms, 244956 recovered, 244958 free
 4:05:38 USER	*** Garbage collection starting
 4:05:40 USER	*** GC 9: time 1326 ms, 244950 recovered, 244952 free
 4:05:45 USER	*** Garbage collection starting
 4:05:48 USER	*** GC 10: time 1576 ms, 244956 recovered, 244958 free
 4:05:48 USER	
 4:05:48 USER	Cpu (- GC) Time = 35.517 secs
 4:05:48 USER	Elapsed Time = 53.0 secs
 4:05:48 USER	Wholine Time = 0.0
 4:05:48 USER	GC Time = 11.07 secs
 4:05:48 USER	Load Average Before  = 1.1
 4:05:48 USER	Load Average After   = 1.1
 4:05:48 USER	Average Load Average = 1.1